# clear environment
rm(list=ls())

# load packages
library(openxlsx)

# load data set with chapter lengths and crime numbers
dat = openxlsx::read.xlsx("vsberichte_metadata.xlsx", sheet = 1)

# binary varibable indicating whether interior minister is from a center-right party
dat$cr_intmin <- ifelse(dat$intmin_party == "cdu" | dat$intmin_party== "csu" | dat$intmin_party=="fdp", 1, ifelse(dat$intmin_party=="pro", NA, 0))

# remove those without center-right or center-left interior minister
dat <- dat[!is.na(dat$cr_intmin),]

# create chapter length variable
dat$rwe_chapter_length <- dat$page_right_end-dat$page_right_start
dat$lwe_chapter_length <- dat$page_left_end-dat$page_left_start

# difference in chapter length between chapters on right-wing and left-wing extremism
dat$rwe_lwe_chapter_diff <- dat$rwe_chapter_length - dat$lwe_chapter_length

# remove those with NA in chapter difference
dat <- dat[!is.na(dat$rwe_lwe_chapter_diff),]

# difference in crimes
dat$rwe_lwe_crime_diff <- dat$extreme_crime_right - dat$extreme_crime_left

# get crime difference on federal level and merge it with the main data
crime_fed <- dat[dat$jurisdiction=="bund",c("year", "extreme_crime_right", "extreme_crime_left")]
names(crime_fed) <- c("year", "extreme_crime_right_fed", "extreme_crime_left_fed")
crime_fed$rwe_lwe_crime_diff_fed <- crime_fed$extreme_crime_right_fed-crime_fed$extreme_crime_left_fed
dat <- merge(dat, crime_fed, by="year", all.x=T)

# impute missing crime difference with value on federal level
dat$rwe_lwe_crime_diff_fedimp <- ifelse(is.na(dat$rwe_lwe_crime_diff),
                                        dat$rwe_lwe_crime_diff_fed,
                                        dat$rwe_lwe_crime_diff)

# generate bias measure: the difference in (normalized) differences of crime and chapter length
dat$bias_diff_fedimp <- dat$rwe_lwe_chapter_diff/sd(dat$rwe_lwe_chapter_diff, na.rm=T) - dat$rwe_lwe_crime_diff_fedimp/sd(dat$rwe_lwe_crime_diff_fedimp, na.rm = T)

# create decade indicator
dat$decade <- NA
dat$decade[dat$year<1960]<-1950
dat$decade[dat$year>=1960 & dat$year<1970]<-1960
dat$decade[dat$year>=1970 & dat$year<1980]<-1970
dat$decade[dat$year>=1980 & dat$year<1990]<-1980
dat$decade[dat$year>=1990 & dat$year<2000]<-1990
dat$decade[dat$year>=2000 & dat$year<2010]<-2000
dat$decade[dat$year>=2010 & dat$year<2020]<-2010
dat$decade[dat$year>=2020 & dat$year<2030]<-2020

## regressions models
# position
mod_1 <- lm(rwe_lwe_chapter_diff ~ factor(cr_intmin), data=dat)
mod_2 <- lm(rwe_lwe_chapter_diff ~ factor(cr_intmin) + factor(jurisdiction), data=dat)
mod_3 <- lm(rwe_lwe_chapter_diff ~ factor(cr_intmin) + factor(jurisdiction) + factor(decade), data=dat)
mod_4 <- lm(rwe_lwe_chapter_diff ~ factor(cr_intmin) + factor(jurisdiction) + factor(year), data=dat)
# bias
mod_5 <- lm(bias_diff_fedimp ~ factor(cr_intmin), data=dat)
mod_6 <- lm(bias_diff_fedimp ~ factor(cr_intmin) + factor(jurisdiction), data=dat)
mod_7 <- lm(bias_diff_fedimp ~ factor(cr_intmin) + factor(jurisdiction) + factor(decade), data=dat)
mod_8 <- lm(bias_diff_fedimp ~ factor(cr_intmin) + factor(jurisdiction) + factor(year), data=dat)

